import scrapy
from pprint import pprint
from ..items import BiqugeItem
import re

class BiqugeSpider(scrapy.Spider):
    name = "biquge"
    allowed_domains = ["bqka.cc"]
    start_urls = ["https://www.bqka.cc/book/626/"]

    # 二级页面
    def parse(self, response):
        item = BiqugeItem()
        book_urls=response.xpath("//div[@class='listmain']//dd/a[starts-with(@href,'/book')]/@href").extract()
        # book_name=response.xpath("//h1/text()").extract_first()
        for book_url in book_urls:
            yield scrapy.Request('https://www.bqka.cc'+book_url,
                                 meta={'item':item},
                                 callback=self.parse_page2)
    # 三级页面
    def parse_page2(self,response):
        item = response.meta['item']
        content=re.sub('\u3000\u3000','\n',''.join(response.xpath("//div[@id='chaptercontent']/text()").extract())).strip('\n')
        title=''.join(response.xpath("//h1/text()").extract())
        item['content']=content
        item['title']=title
        yield item
